/*------------------------------------------------------------------------------
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
*
* Distributable under the terms of either the Apache License (Version 2.0) or
* the GNU Lesser General Public License, as specified in the LICENSE file.
------------------------------------------------------------------------------*/
#include "../Lucene.hpp"
#include "AnalysisHeader.hpp"
#include "../util/StringBuffer.hpp"
#include "unicode/ustring.h"
#include <stdlib.h>

U_STRING_DECL(cStrDefaultType, "word", 4);
CL_NS_DEF(analysis)
void Token::InitStringDecl() {
	U_STRING_INIT(cStrDefaultType, "word", 4);
}
CL_NS_END

using namespace Common;
CL_NS_USE(util)

CL_NS_DEF(analysis)

const UChar *Token::defaultType = cStrDefaultType;

Token::Token():
		_startOffset(0),
		_endOffset(0),
		_type(defaultType),
		positionIncrement(1)
{
	_termTextLen = 0;
#ifndef LUCENE_TOKEN_WORD_LENGTH
	_termText = NULL;
	bufferTextLen = 0;
#else
	_termText[0] = 0; //make sure null terminated
	bufferTextLen = LUCENE_TOKEN_WORD_LENGTH + 1;
#endif
}

Token::~Token()
{
#ifndef LUCENE_TOKEN_WORD_LENGTH
	::free(_termText);
#endif
}

Token::Token(const UChar *text, const int32_t start, const int32_t end, const UChar *typ):
		_startOffset(start),
		_endOffset(end),
		_type(typ),
		positionIncrement(1)
{
	_termTextLen = 0;
#ifndef LUCENE_TOKEN_WORD_LENGTH
	_termText = NULL;
	bufferTextLen = 0;
#else
	_termText[0] = 0; //make sure null terminated
	bufferTextLen = LUCENE_TOKEN_WORD_LENGTH + 1;
#endif
	setText(text);
}

void Token::set(const UChar *text, const int32_t start, const int32_t end, const UChar *typ)
{
	_startOffset = start;
	_endOffset   = end;
	_type        = typ;
	positionIncrement = 1;
	setText(text);
}

void Token::setText(const UChar *text)
{
	_termTextLen = _tcslen(text);

#ifndef LUCENE_TOKEN_WORD_LENGTH
	growBuffer(_termTextLen + 1);
	_tcsncpy(_termText, text, _termTextLen + 1);
#else
	if (_termTextLen > LUCENE_TOKEN_WORD_LENGTH) {
		//in the case where this occurs, we will leave the endOffset as it is
		//since the actual word still occupies that space.
		_termTextLen = LUCENE_TOKEN_WORD_LENGTH;
	}
	_tcsncpy(_termText, text, _termTextLen + 1);
#endif
	_termText[_termTextLen] = 0; //make sure null terminated
}

void Token::growBuffer(size_t size)
{
	if (bufferTextLen >= size)
		return;
#ifndef LUCENE_TOKEN_WORD_LENGTH
	if (_termText == NULL)
		_termText = (UChar*)::malloc(size * sizeof(UChar));
	else
		_termText = (UChar*)::realloc(_termText, size * sizeof(UChar));
	bufferTextLen = size;
#else
	_CLTHROWA(CL_ERR_TokenMgr, "Couldn't grow Token buffer");
#endif
}

void Token::setPositionIncrement(int32_t posIncr)
{
	if (posIncr < 0) {
		_CLTHROWA(CL_ERR_IllegalArgument, "positionIncrement must be >= 0");
	}
	positionIncrement = posIncr;
}

int32_t Token::getPositionIncrement() const
{
	return positionIncrement;
}

// Returns the Token's term text.
const UChar *Token::termText() const
{
	return (const UChar*) _termText;
}
size_t Token::termTextLength()
{
	if (_termTextLen == -1) //it was invalidated by growBuffer
		_termTextLen = _tcslen(_termText);
	return _termTextLen;
}
void Token::resetTermTextLen()
{
	_termTextLen = -1;
}

/*
bool Token::OrderCompare::operator()(Token* t1, Token* t2) const
{
	if (t1->startOffset() > t2->startOffset())
		return false;
	if (t1->startOffset() < t2->startOffset())
		return true;
	return true;
}
*/

UChar *Token::toString() const
{
	StringBuffer sb;
	sb.append(_T("("));
	sb.append(_termText);
	sb.append(_T(","));
	sb.appendInt(_startOffset);
	sb.append(_T(","));
	sb.appendInt(_endOffset);

	if (!_tcscmp(_type, defaultType) == 0) {
		sb.append(_T(",type="));
		sb.append(_type);
	}
	if (positionIncrement != 1) {
		sb.append(_T(",posIncr="));
		sb.appendInt(positionIncrement);
	}
	sb.append(_T(")"));

	return sb.toString();
}

TokenFilter::TokenFilter(TokenStream* in, bool deleteTS):
		input(in),
		deleteTokenStream(deleteTS)
{
}
TokenFilter::~TokenFilter()
{
	close();
}

// Close the input TokenStream.
void TokenFilter::close()
{
	if (input != NULL) {
		input->close();
		if (deleteTokenStream)
			_CLDELETE(input);
	}
	input = NULL;
}


Tokenizer::Tokenizer()
{
	input = NULL;
}

Tokenizer::Tokenizer(CL_NS(util)::Reader* _input):
		input(_input)
{
}

void Tokenizer::close()
{
	if (input != NULL) {
		// ? delete input;
		input = NULL;
	}
}

Tokenizer::~Tokenizer()
{
	close();
}


int32_t Analyzer::getPositionIncrementGap(const UChar *fieldName)
{
	return 0;
}

CL_NS_END
